// ---------------------------------------------------------------------------
// Copyright (C) 2022, Bela contributors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp)
// with modifications.
//
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ---------------------------------------------------------------------------
#ifndef BELA_STR_SPLIT_HPP
#define BELA_STR_SPLIT_HPP
#pragma once
#include <algorithm>
#include <cstddef>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "ascii.hpp"
#include "__strings/str_split_internal.hpp"

namespace bela {
//------------------------------------------------------------------------------
// Delimiters
//------------------------------------------------------------------------------
//
// `StrSplit()` uses delimiters to define the boundaries between elements in the
// provided input. Several `Delimiter` types are defined below. If a string
// (`const wchar_t*`, `std::wstring`, or `std::wstring_view`) is passed in place
// of an explicit `Delimiter` object, `StrSplit()` treats it the same way as if
// it were passed a `ByString` delimiter.
//
// A `Delimiter` is an object with a `Find()` function that knows how to find
// the first occurrence of itself in a given `std::wstring_view`.
//
// The following `Delimiter` types are available for use within `StrSplit()`:
//
//   - `ByString` (default for string arguments)
//   - `ByChar` (default for a char argument)
//   - `ByAnyChar`
//   - `ByLength`
//   - `MaxSplits`
//
// A Delimiter's `Find()` member function will be passed an input `text` that is
// to be split and a position (`pos`) to begin searching for the next delimiter
// in `text`. The returned std::wstring_view should refer to the next occurrence
// (after `pos`) of the represented delimiter; this returned std::wstring_view
// represents the next location where the input `text` should be broken.
//
// The returned std::wstring_view may be zero-length if the Delimiter does not
// represent a part of the string (e.g., a fixed-length delimiter). If no
// delimiter is found in the input `text`, a zero-length std::wstring_view
// referring to `text.end()` should be returned (e.g.,
// `text.substr(text.size())`). It is important that the returned
// std::wstring_view always be within the bounds of the input `text` given as an
// argument--it must not refer to a string that is physically located outside of
// the given string.
//
// The following example is a simple Delimiter object that is created with a
// single char and will look for that char in the text passed to the `Find()`
// function:
//
//   struct SimpleDelimiter {
//     const wchar_t c_;
//     explicit SimpleDelimiter(wchar_t c) : c_(c) {}
//     std::wstring_view Find(std::wstring_view text, size_t pos) {
//       auto found = text.find(c_, pos);
//       if (found == std::wstring_view::npos)
//         return text.substr(text.size());
//
//       return text.substr(found, 1);
//     }
//   };

// ByString
//
// A sub-string delimiter. If `StrSplit()` is passed a string in place of a
// `Delimiter` object, the string will be implicitly converted into a
// `ByString` delimiter.
//
// Example:
//
//   // Because a string literal is converted to an `bela::ByString`,
//   // the following two splits are equivalent.
//
//   std::vector<std::wstring> v1 = bela::StrSplit(L"a, b, c", L", ");
//
//   using bela::ByString;
//   std::vector<std::wstring> v2 = bela::StrSplit(L"a, b, c",
//                                                ByString(L", "));
//   // v[0] == L"a", v[1] == L"b", v[2] == L"c"
class ByString {
public:
  explicit ByString(std::wstring_view sp);
  std::wstring_view Find(std::wstring_view text, size_t pos) const;

private:
  const std::wstring delimiter_;
};

// ByChar
//
// A single character delimiter. `ByChar` is functionally equivalent to a
// 1-char string within a `ByString` delimiter, but slightly more efficient.
//
// Example:
//
//   // Because a char literal is converted to a bela::ByChar,
//   // the following two splits are equivalent.
//   std::vector<std::wstring> v1 = bela::StrSplit(L"a,b,c", ',');
//   using bela::ByChar;
//   std::vector<std::wstring> v2 = bela::StrSplit(L"a,b,c", ByChar(','));
//   // v[0] == L"a", v[1] == L"b", v[2] == L"c"
//
// `ByChar` is also the default delimiter if a single character is given
// as the delimiter to `StrSplit()`. For example, the following calls are
// equivalent:
//
//   std::vector<std::wstring> v = bela::StrSplit(L"a-b", '-');
//
//   using bela::ByChar;
//   std::vector<std::wstring> v = bela::StrSplit(L"a-b", ByChar('-'));
//
class ByChar {
public:
  explicit ByChar(wchar_t c) : c_(c) {}
  std::wstring_view Find(std::wstring_view text, size_t pos) const;

private:
  wchar_t c_;
};

// ByAnyChar
//
// A delimiter that will match any of the given byte-sized characters within
// its provided string.
//
//
// Example:
//
//   using bela::ByAnyChar;
//   std::vector<std::wstring> v = bela::StrSplit(L"a,b=c", ByAnyChar(L",="));
//   // v[0] == L"a", v[1] == L"b", v[2] == L"c"
//
// If `ByAnyChar` is given the empty string, it behaves exactly like
// `ByString` and matches each individual character in the input string.
//
class ByAnyChar {
public:
  explicit ByAnyChar(std::wstring_view sp);
  std::wstring_view Find(std::wstring_view text, size_t pos) const;

private:
  const std::wstring delimiters_;
};

// ByLength
//
// A delimiter for splitting into equal-length strings. The length argument to
// the constructor must be greater than 0.
//
// Note: this delimiter works with single-byte string data, but does not work
// with variable-width encodings, such as UTF-8.
//
// Example:
//
//   using bela::ByLength;
//   std::vector<std::wstring> v = bela::StrSplit(L"123456789", ByLength(3));

//   // v[0] == L"123", v[1] == L"456", v[2] == L"789"
//
// Note that the string does not have to be a multiple of the fixed split
// length. In such a case, the last substring will be shorter.
//
//   using bela::ByLength;
//   std::vector<std::wstring> v = bela::StrSplit(L"12345", ByLength(2));
//
//   // v[0] == L"12", v[1] == L"34", v[2] == L"5"
class ByLength {
public:
  explicit ByLength(ptrdiff_t length);
  std::wstring_view Find(std::wstring_view text, size_t pos) const;

private:
  const ptrdiff_t length_;
};

namespace strings_internal {

// A traits-like metafunction for selecting the default Delimiter object type
// for a particular Delimiter type. The base case simply exposes type Delimiter
// itself as the delimiter's Type. However, there are specializations for
// string-like objects that map them to the ByString delimiter object.
// This allows functions like bela::StrSplit() and bela::MaxSplits() to accept
// string-like objects (e.g., ',') as delimiter arguments but they will be
// treated as if a ByString delimiter was given.
template <typename Delimiter> struct SelectDelimiter { using type = Delimiter; };

template <> struct SelectDelimiter<wchar_t> { using type = ByChar; };
template <> struct SelectDelimiter<wchar_t *> { using type = ByString; };
template <> struct SelectDelimiter<const wchar_t *> { using type = ByString; };
template <> struct SelectDelimiter<std::wstring_view> { using type = ByString; };
template <> struct SelectDelimiter<std::wstring> { using type = ByString; };

// Wraps another delimiter and sets a max number of matches for that delimiter.
template <typename Delimiter> class MaxSplitsImpl {
public:
  MaxSplitsImpl(Delimiter delimiter, int limit) : delimiter_(delimiter), limit_(limit), count_(0) {}
  std::wstring_view Find(std::wstring_view text, size_t pos) {
    if (count_++ == limit_) {
      return std::wstring_view(text.data() + text.size(),
                               0); // No more matches.
    }
    return delimiter_.Find(text, pos);
  }

private:
  Delimiter delimiter_;
  const int limit_;
  int count_;
};

} // namespace strings_internal

// MaxSplits()
//
// A delimiter that limits the number of matches which can occur to the passed
// `limit`. The last element in the returned collection will contain all
// remaining unsplit pieces, which may contain instances of the delimiter.
// The collection will contain at most `limit` + 1 elements.
// Example:
//
//   using bela::MaxSplits;
//   std::vector<std::wstring> v = bela::StrSplit(L"a,b,c", MaxSplits(',', 1));
//
//   // v[0] == L"a", v[1] == L"b,c"
template <typename Delimiter>
inline strings_internal::MaxSplitsImpl<typename strings_internal::SelectDelimiter<Delimiter>::type>
MaxSplits(Delimiter delimiter, int limit) {
  typedef typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
  return strings_internal::MaxSplitsImpl<DelimiterType>(DelimiterType(delimiter), limit);
}

//------------------------------------------------------------------------------
// Predicates
//------------------------------------------------------------------------------
//
// Predicates filter the results of a `StrSplit()` by determining whether or not
// a resultant element is included in the result set. A predicate may be passed
// as an optional third argument to the `StrSplit()` function.
//
// Predicates are unary functions (or functors) that take a single
// `std::wstring_view` argument and return a bool indicating whether the
// argument should be included (`true`) or excluded (`false`).
//
// Predicates are useful when filtering out empty substrings. By default, empty
// substrings may be returned by `StrSplit()`, which is similar to the way split
// functions work in other programming languages.

// AllowEmpty()
//
// Always returns `true`, indicating that all strings--including empty
// strings--should be included in the split output. This predicate is not
// strictly needed because this is the default behavior of `StrSplit()`;
// however, it might be useful at some call sites to make the intent explicit.
//
// Example:
//
//  std::vector<std::wstring> v = bela::StrSplit(L" a , ,,b,", ',',
//  AllowEmpty());
//
//  // v[0] == L" a ", v[1] == L" ", v[2] == L"", v[3] = L"b", v[4] == L""
struct AllowEmpty {
  bool operator()(std::wstring_view) const { return true; }
};

// SkipEmpty()
//
// Returns `false` if the given `std::wstring_view` is empty, indicating that
// `StrSplit()` should omit the empty string.
//
// Example:
//
//   std::vector<std::wstring> v = bela::StrSplit(L",a,,b,", ',', SkipEmpty());
//
//   // v[0] == L"a", v[1] == L"b"
//
// Note: `SkipEmpty()` does not consider a string containing only whitespace
// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
// predicate.
struct SkipEmpty {
  bool operator()(std::wstring_view sp) const { return !sp.empty(); }
};

// SkipWhitespace()
//
// Returns `false` if the given `std::wstring_view` is empty *or* contains only
// whitespace, indicating that `StrSplit()` should omit the string.
//
// Example:
//
//   std::vector<std::wstring> v = bela::StrSplit(L" a , ,,b,",
//                                               ',', SkipWhitespace());
//   // v[0] == L" a ", v[1] == L"b"
//
//   // SkipEmpty() would return whitespace elements
//   std::vector<std::wstring> v = bela::StrSplit(L" a , ,,b,", ',',
//   SkipEmpty());
//   // v[0] == L" a ", v[1] == L" ", v[2] == L"b"
struct SkipWhitespace {
  bool operator()(std::wstring_view sp) const {
    sp = bela::StripAsciiWhitespace(sp);
    return !sp.empty();
  }
};

template <typename T>
using EnableSplitIfString =
    typename std::enable_if<std::is_same<T, std::wstring>::value || std::is_same<T, const std::wstring>::value,
                            int>::type;

//------------------------------------------------------------------------------
//                                  StrSplit()
//------------------------------------------------------------------------------

// StrSplit()
//
// Splits a given string based on the provided `Delimiter` object, returning the
// elements within the type specified by the caller. Optionally, you may pass a
// `Predicate` to `StrSplit()` indicating whether to include or exclude the
// resulting element within the final result set. (See the overviews for
// Delimiters and Predicates above.)
//
// Example:
//
//   std::vector<std::wstring> v = bela::StrSplit(L"a,b,c,d", ',');
//   // v[0] == L"a", v[1] == L"b", v[2] == L"c", v[3] == L"d"
//
// You can also provide an explicit `Delimiter` object:
//
// Example:
//
//   using bela::ByAnyChar;
//   std::vector<std::wstring> v = bela::StrSplit(L"a,b=c", ByAnyChar(",="));
//   // v[0] == L"a", v[1] == L"b", v[2] == L"c"
//
// See above for more information on delimiters.
//
// By default, empty strings are included in the result set. You can optionally
// include a third `Predicate` argument to apply a test for whether the
// resultant element should be included in the result set:
//
// Example:
//
//   std::vector<std::wstring> v = bela::StrSplit(L" a , ,,b,",
//                                               ',', SkipWhitespace());
//   // v[0] == L" a ", v[1] == L"b"
//
// See above for more information on predicates.
//
//------------------------------------------------------------------------------
// StrSplit() Return Types
//------------------------------------------------------------------------------
//
// The `StrSplit()` function adapts the returned collection to the collection
// specified by the caller (e.g. `std::vector` above). The returned collections
// may contain `std::wstring`, `std::wstring_view` (in which case the original
// string being split must ensure that it outlives the collection), or any
// object that can be explicitly created from an `std::wstring_view`. This
// behavior works for:
//
// 1) All standard STL containers including `std::vector`, `std::list`,
//    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
// 2) `std::pair` (which is not actually a container). See below.
//
// Example:
//
//   // The results are returned as `std::wstring_view` objects. Note that we
//   // have to ensure that the input string outlives any results.
//   std::vector<std::wstring_view> v = bela::StrSplit(L"a,b,c", ',');
//
//   // Stores results in a std::set<std::wstring>, which also performs
//   // de-duplication and orders the elements in ascending order.
//   std::set<std::wstring> a = bela::StrSplit(L"b,a,c,a,b", ',');
//   // v[0] == L"a", v[1] == L"b", v[2] = L"c"
//
//   // `StrSplit()` can be used within a range-based for loop, in which case
//   // each element will be of type `std::wstring_view`.
//   std::vector<std::wstring> v;
//   for (const auto sv : bela::StrSplit("a,b,c", ',')) {
//     if (sv != "b") v.emplace_back(sv);
//   }
//   // v[0] == "a", v[1] == "c"
//
//   // Stores results in a map. The map implementation assumes that the input
//   // is provided as a series of key/value pairs. For example, the 0th element
//   // resulting from the split will be stored as a key to the 1st element. If
//   // an odd number of elements are resolved, the last element is paired with
//   // a default-constructed value (e.g., empty string).
//   std::map<std::wstring, std::wstring> m = bela::StrSplit("a,b,c", ',');
//   // m["a"] == "b", m["c"] == ""     // last component value equals ""
//
// Splitting to `std::pair` is an interesting case because it can hold only two
// elements and is not a collection type. When splitting to a `std::pair` the
// first two split strings become the `std::pair` `.first` and `.second`
// members, respectively. The remaining split substrings are discarded. If there
// are less than two split substrings, the empty string is used for the
// corresponding
// `std::pair` member.
//
// Example:
//
//   // Stores first two split strings as the members in a std::pair.
//   std::pair<std::wstring, std::wstring> p = bela::StrSplit("a,b,c", ',');
//   // p.first == "a", p.second == "b"       // "c" is omitted.
//
// The `StrSplit()` function can be used multiple times to perform more
// complicated splitting logic, such as intelligently parsing key-value pairs.
//
// Example:
//
//   // The input string L"a=b=c,d=e,f=,g" becomes
//   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
//   std::map<std::wstring, std::wstring> m;
//   for (std::wstring_view sp : bela::StrSplit("a=b=c,d=e,f=,g", ',')) {
//     m.insert(bela::StrSplit(sp, bela::MaxSplits('=', 1)));
//   }
//   EXPECT_EQ("b=c", m.find("a")->second);
//   EXPECT_EQ("e", m.find("d")->second);
//   EXPECT_EQ("", m.find("f")->second);
//   EXPECT_EQ("", m.find("g")->second);
//
// WARNING: Due to a legacy bug that is maintained for backward compatibility,
// splitting the following empty string_views produces different results:
//
//   bela::StrSplit(std::wstring_view(""), '-');  // {""}
//   bela::StrSplit(std::wstring_view(), '-');    // {}, but should be {""}
//
// Try not to depend on this distinction because the bug may one day be fixed.
template <typename Delimiter>
strings_internal::Splitter<typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, std::wstring_view>
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
  using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type;
  return strings_internal::Splitter<DelimiterType, AllowEmpty, std::wstring_view>(text.value(), DelimiterType(d),
                                                                                  AllowEmpty());
}

template <typename Delimiter, typename StringType, EnableSplitIfString<StringType> = 0>
strings_internal::Splitter<typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, std::wstring>
StrSplit(StringType &&text, Delimiter d) {
  using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type;
  return strings_internal::Splitter<DelimiterType, AllowEmpty, std::wstring>(std::move(text), DelimiterType(d),
                                                                             AllowEmpty());
}

template <typename Delimiter, typename Predicate>
strings_internal::Splitter<typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, std::wstring_view>
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, Predicate p) {
  using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type;
  return strings_internal::Splitter<DelimiterType, Predicate, std::wstring_view>(text.value(), DelimiterType(d),
                                                                                 std::move(p));
}

template <typename Delimiter, typename Predicate, typename StringType, EnableSplitIfString<StringType> = 0>
strings_internal::Splitter<typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, std::wstring>
StrSplit(StringType &&text, Delimiter d, Predicate p) {
  using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type;
  return strings_internal::Splitter<DelimiterType, Predicate, std::wstring>(std::move(text), DelimiterType(d),
                                                                            std::move(p));
}

} // namespace bela

#endif
